import requests
import re
import os
import bs4
if __name__ == "__main__":
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55'
    }
    person=[]
    schools=[]
    url='http://bestcoder.hdu.edu.cn/contestRegisterList.php?cid=998&page={}'
    for pageNum in range(1,833):
        #对应页码的url
        new_url = url.format(pageNum)
        #使用通用爬虫对url对应的一整张页面进行爬取
        page_text = requests.get(url=new_url,headers=headers)
        #page_text.encoding='utf-8'
        #使用聚焦爬虫将页面中所有的图片进行解析/提取
        soup=bs4.BeautifulSoup(page_text.text,'html.parser')
        data=soup.find_all("tr")[2:26]
        #print(img_src_list)
        for i in data:
            name=i.find_all('td')[1].a.text
            school=i.find_all('td')[2].text
            schools.append(school)
            if(school=='É½¶«¿Æ¼¼´óÑ§'):
                person.append(name)

            #if i.td[2]=="É½¶«¿Æ¼¼´óÑ§":
            #    person.append(i.td[1].a.text)
    rank=[]
    url="http://bestcoder.hdu.edu.cn/contests/contest_ranklist.php?cid=998&page={}"
    for pageNum in range(1,183):
        #对应页码的url
        new_url = url.format(pageNum)
        #使用通用爬虫对url对应的一整张页面进行爬取
        page_text = requests.get(url=new_url,headers=headers)
        soup=bs4.BeautifulSoup(page_text.text,'html.parser')
        data=soup.find_all("tr")[1:26]
        for i in data:
            pos=i.find_all('td')[0].text
            name=i.find_all('td')[1].a.text
            if name in person:
                rank.append(pos)
    
    print(rank)
